Create a histogram of diamond prices. Facet the histogram by diamond color and use cut to color the histogram bars.
ggplot(data = diamonds, aes(price)) +
geom_histogram(aes(color=cut), binwidth = 100) +
facet_wrap(~ color) +
scale_fill_brewer(type = 'qual')
Create a scatterplot of diamond price vs. table and color the points by the cut of the diamond.
ggplot(data = diamonds, aes(x = table, y = price)) +
geom_point(aes(color=cut)) +
scale_x_continuous(breaks = seq(50,70,1)) +
coord_cartesian(xlim = c(50, 70))
Create a scatterplot of diamond price vs. volume (x * y * z) and color the points by the clarity of diamonds. Use scale on the y-axis to take the log10 of price. You should also omit the top 1% of diamond volumes from the plot.
diamonds$volume <- with(diamonds, x * y * z)
vol99q <- with(diamonds, quantile(volume, probs = c(.99)))
ggplot(data = subset(diamonds, volume <= vol99q),
aes(x = volume, y = price)) +
geom_point(alpha = 1/5, aes(color=clarity)) +
scale_y_log10()
Your task is to create a new variable called ‘prop_initiated’ in the Pseudo-Facebook data set. The variable should contain the proportion of friendships that the user initiated.
pf <- read.csv("pseudo_facebook.tsv", sep = '\t')
pf$prop_initiated <- with(pf, ifelse(friend_count > 0, friendships_initiated / friend_count, 0))
Create a line graph of the median proportion of friendships initiated (‘prop_initiated’) vs. tenure and color the line segment by year_joined.bucket.
pf$year_joined <- with(pf, floor(2014 - tenure / 365))
pf$year_joined.bucket <- with(pf, cut(year_joined, breaks = c(2004,2009,2011,2012,2014)))
ggplot(data = subset(pf, prop_initiated > 0), aes(x = tenure, y = prop_initiated)) +
geom_line(aes(color=year_joined.bucket), stat='summary', fun.y = median)
## Warning: Removed 2 rows containing non-finite values (stat_summary).
Smooth the last plot you created of prop_initiated vs tenure colored by year_joined.bucket.
ggplot(data = subset(pf, prop_initiated > 0), aes(x = tenure, y = prop_initiated)) +
geom_line(aes(color=year_joined.bucket), stat='summary', fun.y = median) +
geom_smooth()
## Warning: Removed 2 rows containing non-finite values (stat_summary).
## Warning: Removed 2 rows containing non-finite values (stat_smooth).
prop_initiated.2012.2014 <- pf %>% filter(year_joined.bucket == "(2012,2014]")
mean(prop_initiated.2012.2014$prop_initiated)
## [1] 0.6430155
Create a scatter plot of the price/carat ratio of diamonds. The variable x should be assigned to cut. The points should be colored by diamond color, and the plot should be faceted by clarity.
ggplot(data = subset(diamonds, carat > 0),
aes(x = cut, y = price / carat)) +
geom_jitter(aes(color = color)) +
facet_wrap(~ clarity) +
scale_color_brewer(type = "div")
library(tidyr)
res.e.usage.pp <- read.csv("Indicator_Residential electricity consumption per person.csv",
row.names = NULL, stringsAsFactors = FALSE)
e.gen.pp <- read.csv("Electricity Generation per capita.csv",
row.names = NULL, stringsAsFactors = FALSE)
e.nuclear.pp <- read.csv("Indicator_Nuclear production per capita.csv",
row.names = NULL, stringsAsFactors = FALSE)
countryList <- intersect(res.e.usage.pp$Country, e.gen.pp$Country)
countryList <- intersect(countryList, e.nuclear.pp$Country)
res.e.usage.pp.tidy <- res.e.usage.pp %>%
filter(Country %in% countryList) %>%
gather("Year", "kwH.residential", 2:50)
res.e.usage.pp.tidy$Year <- substr(res.e.usage.pp.tidy$Year, 2, 5)
e.gen.pp.tidy <- e.gen.pp %>%
filter(Country %in% countryList) %>%
gather("Year", "kwH.generated", 2:20)
e.gen.pp.tidy$Year <- substr(e.gen.pp.tidy$Year, 2, 5)
e.nuclear.pp.tidy <- e.nuclear.pp %>%
filter(Country %in% countryList) %>%
gather("Year", "kwH.nuclear", 2:53)
e.nuclear.pp.tidy$Year <- substr(e.nuclear.pp.tidy$Year, 2, 5)
e.all.tidy <- left_join(res.e.usage.pp.tidy, e.gen.pp.tidy) %>%
left_join(., e.nuclear.pp.tidy) %>%
filter(!(is.na(kwH.residential) | is.na(kwH.generated) | is.na(kwH.nuclear)))
## Joining by: c("Country", "Year")
## Joining by: c("Country", "Year")
ggplot(data = e.all.tidy, aes(x = Year, y = kwH.residential)) +
geom_point(aes(color = kwH.generated)) +
facet_wrap(~ Country)
ggplot(data = e.all.tidy, aes(x = Year, y = kwH.residential)) +
geom_point(aes(color = kwH.nuclear)) +
facet_wrap(~ Country)